import pandas as pd
import zipfile
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler , MinMaxScaler ,RobustScaler


from lazypredict.Supervised import LazyClassifier
from sklearn.preprocessing import OrdinalEncoder

from sklearn.model_selection import train_test_split , cross_validate ,KFold
from sklearn.metrics import log_loss
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
import seaborn as sns
from scipy.stats import boxcox
import matplotlib.pyplot as plt


%%capture
!pip install catboost logzero


import pandas as  pd 
import numpy as np 
import os 
import random
import requests, zipfile

from lightgbm import LGBMClassifier
import xgboost as xgb
from catboost import CatBoostClassifier

from sklearn.model_selection import KFold,StratifiedKFold,GroupKFold

from sklearn.metrics import hamming_loss
from logzero import logger

import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')


np.random.seed(42)


zifile_name = "playground-series-s3e26.zip"
with zipfile.ZipFile(zifile_name,'r') as file :
    file.extractall()


train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sub = pd.read_csv('sample_submission.csv')


import gc
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype
        if col_type != object:
                c_min = df[col].min()
                c_max = df[col].max()
                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min > np.iinfo(np.uint8).min and c_max < np.iinfo(np.uint8).max:
                        df[col] = df[col].astype(np.uint8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min > np.iinfo(np.uint16).min and c_max < np.iinfo(np.uint16).max:
                        df[col] = df[col].astype(np.uint16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min > np.iinfo(np.uint32).min and c_max < np.iinfo(np.uint32).max:
                        df[col] = df[col].astype(np.uint32)                    
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)
                    elif c_min > np.iinfo(np.uint64).min and c_max < np.iinfo(np.uint64).max:
                        df[col] = df[col].astype(np.uint64)
                elif str(col_type)[:5] == 'float':
                    if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                        df[col] = df[col].astype(np.float16)
                    elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        df[col] = df[col].astype(np.float64)
    
    end_mem = df.memory_usage().sum() / 1024**2
    gc.collect()
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    return df


# Set the seed value all over the place to make this reproducible.
def seed_all(SEED=42):
  random.seed(SEED)

  np.random.seed(SEED)
  os.environ['PYTHONHASHSEED'] = str(SEED)


seed_all()


class DataProcessing :
  flags = ['ip.dsfield.ecn','ip.flags.df',]
  list_cols = ['ip.len','ip.ttl','tcp.offset','tcp.options.timestamp.tsval','tcp.window_size_value',
                          'tls.handshake.extensions_length','frame.time_epoch']
  tcp_flags = ['CWR','ECE','SYN','ACK','PSH']
  cert_curve_uniques = ['prime256v1','secp384r1']
  def __init__(self,train,test) :
    self.train = train
    self.test  = test

  def process_list_cols(self,data) :
    for col in self.list_cols :
      #cleaning 
      data[col] = data[col].apply(lambda x : x.strip('[').strip(']').split(', '))
      data[col] = data[col].apply(lambda x : ["'0'" if y=="''" else y for y in x])
      data[col] =data[col].apply(lambda x : [float(y.strip("'")) for y in x])
      #statistics features
      data[f'{col}_max'] = data[f'{col}'].apply(lambda x : np.max(x))

      data[f'{col}_min'] = data[f'{col}'].apply(lambda x : np.min(x))

      data[f'{col}_len'] = data[f'{col}'].apply(lambda x : len(x))

      data[f'{col}_mean'] = data[f'{col}'].apply(lambda x : np.mean(x))

      data[f'{col}_median'] = data[f'{col}'].apply(lambda x : np.median(x))

      data[f'{col}_std'] = data[f'{col}'].apply(lambda x : np.std(x))
    
    return data
  
  def process_flags_cols(self,data) :

    for c in self.flags :
      if c=='ip.dsfield.ecn' :
        data[f'{c}'] = data[f'{c}'].apply(lambda x : x.strip('[').strip(']').split(', '))
        data[f'{c}'] = data[f'{c}'].apply(lambda x : [int(y.strip("'")) for y in x])

        data[f'{c}_0_count'] = data[f'{c}'].apply(lambda x : x.count(0) )
        data[f'{c}_2_count'] = data[f'{c}'].apply(lambda x : x.count(2) )

        data[f'{c}_mean'] = data[f'{c}'].apply(lambda x : np.mean(x) )
        data[f'{c}_std'] = data[f'{c}'].apply(lambda x : np.std(x) )
      else :
        data[f'{c}'] = data[f'{c}'].apply(lambda x : x.strip('[').strip(']').split(', '))
        data[f'{c}'] = data[f'{c}'].apply(lambda x : [int(y.strip("'")) for y in x])

        data[f'{c}_0_count'] = data[f'{c}'].apply(lambda x : x.count(0) )
        data[f'{c}_1_count'] = data[f'{c}'].apply(lambda x : x.count(1) )

        data[f'{c}_mean'] = data[f'{c}'].apply(lambda x : np.mean(x) )
        data[f'{c}_std'] = data[f'{c}'].apply(lambda x : np.std(x) )

    return data
  
  def process_tcp_flags(self,data) :
    
    for c in self.tcp_flags :
      data[f'tcp_flag_{c}'] = (data['tcp.flags'] ==c)*1
      data[f'tcp_flag_{c}_count'] = data['tcp.flags'].apply(lambda x : x.count(c))
    return data
  def FE(self,data) :
    data['tcp.options.timestamp.tsval_diff'] = train['tcp.options.timestamp.tsval_max'] - data['tcp.options.timestamp.tsval_min']

    data['packet_directions'] = data['packet_directions'].apply(lambda x : x.strip('[').strip(']').split(', '))
    data['packet_directions_I_count'] = data['packet_directions'].apply(lambda x : x.count("'I'") )

    data['packet_directions_O_count'] = data['packet_directions'].apply(lambda x : x.count("'O'") )

    data['packet_directions_len'] = data['packet_directions'].apply(lambda x : len(x) )
    for c in self.cert_curve_uniques : 
      data[c] = data['cert.curve'].fillna('').str.contains(c)*1

    data['cert.curve_len'] = data['cert.curve'].fillna('').apply(lambda x : len(x))
    #ohe Encode
    tls_vers = ['TLS 1.3','TLS 1.2','Reserved (GREASE)','TLS 1.1','TLS 1.0',]
    for c in tls_vers :
      data[c] = data['tls.handshake.extensions.supported_version.ch'].str.contains(c)*1
    #ohe 
    tls_ch = ['h2','http/1.1']
    for c in tls_vers :
      data[c] = data['tls.handshake.extensions_alpn_str.ch'].str.contains(c)*1
    #Label enc
    sh_mapper = {'http/1.1' :0,'h2':1}
    data['tls.handshake.extensions_alpn_str.sh'] = data['tls.handshake.extensions_alpn_str.sh'].map(sh_mapper)
    #Label Enc
    format_ch_mapper = {"['0']" :0,"['0', '1', '2']":1}
    data['tls.handshake.extensions_ec_point_format.ch'] = data['tls.handshake.extensions_ec_point_format.ch'].map(format_ch_mapper)
    data['tls.handshake.extensions_ec_point_format.sh'] = data['tls.handshake.extensions_ec_point_format.sh'].map(format_ch_mapper)
    #Label Enc
    tls_handshake_mapper = {'TLSv1.2' :0,'TLSv1.3':1}
    data['tls.handshake.version.sh'] = data['tls.handshake.version.sh'].map(tls_handshake_mapper)
    #ohe
    for c in tls_vers :
      data[f'tls.record.version.ch_{c}'] = data['tls.record.version.ch'].str.contains(c)*1
    return data

  def get_tf_idf_feats(self,train,test) :
      from sklearn.feature_extraction.text import TfidfVectorizer
      vect = TfidfVectorizer(max_features=10,analyzer='char')
      feats = vect.fit_transform(train['tls.cipher'])
      train = pd.concat([train,pd.DataFrame(feats.todense(),columns=vect.get_feature_names())],1)
      test = pd.concat([test,pd.DataFrame(vect.transform(test['tls.cipher']).todense(),columns=vect.get_feature_names())],1)  
      return train,test


    





  

  def process(self,) :
    self.train = reduce_mem_usage(self.train)
    self.test  = reduce_mem_usage(self.test)
    logger.info(f"Processing Data")
    self.train = self.process_list_cols(self.train)
    self.test = self.process_list_cols(self.test)

    logger.info(f"Added statistics about tcp tls and ip logs ")
    self.train = self.process_flags_cols(self.train)
    self.test = self.process_flags_cols(self.test)

    logger.info(f"Added flags features ")

    self.train = self.process_tcp_flags(self.train)
    self.test = self.process_tcp_flags(self.test)

    logger.info(f"Added tcp flags features ")
    logger.info(f"Feature Engineering ")

    self.train = self.FE(self.train)
    self.test = self.FE(self.test)
    
    self.train,self.test = self.get_tf_idf_feats(self.train,self.test)
    logger.info(f"Added TF-IDF Features ")
    
    # Get 
        
    self.train = reduce_mem_usage(self.train)
    self.test  = reduce_mem_usage(self.test)
    
    return self.train, self.test


sns.countplot(data = train , x = train.Status)

<Axes: xlabel='Status', ylabel='count'>


n,m = train.shape


id = test.id


y = train.Status


num = ["Bilirubin","Cholesterol" , "Albumin","Copper","Alk_Phos","SGOT","Tryglicerides","Platelets","Prothrombin"]


train[num].std()

Bilirubin           3.812960
Cholesterol       195.379344
Albumin             0.346171
Copper             75.899266
Alk_Phos         1903.750657
SGOT               48.790945
Tryglicerides      52.530402
Platelets          87.465579
Prothrombin         0.781735
dtype: float64


train_copy  = train[num]


for i in num:
    plt.figure(figsize=(8, 6))  # Adjust the figure size as needed
    sns.boxplot(data=train, x=i)
    plt.title(f'Boxplot for {i}')
    plt.show()


def days_to_years(age_in_days):
    return age_in_days / 365.25



train["Age_y"] = days_to_years(train["Age"]).astype('int')
test["Age_y"] = days_to_years(test["Age"]).astype('int')


dict = {"D" : 0 , 'C':1 , 'CL' : 2}


# import itertools
# FEATS = ["N_Days",'Bilirubin',"Cholesterol","Albumin","Copper","Alk_Phos","Tryglicerides","SGOT","Platelets","Prothrombin","Age"]
# # Feature engineering
# def feature_engineering(df):

#     # Create new columns representing pairwise sums, quotients, and products
#     for feat1, feat2 in itertools.combinations(FEATS, 2):
#         new_col_name = f'{feat1}_plus_{feat2}'
#         df[new_col_name] = df[feat1] + df[feat2]

#         new_col_name = f'{feat1}_div_{feat2}'
#         df[new_col_name] = df[feat1] / df[feat2]

#         new_col_name = f'{feat1}_times_{feat2}'
#         df[new_col_name] = df[feat1] * df[feat2]

#     return df

# train = feature_engineering(train)
# test = feature_engineering(test)
# print("Feature engineering complete...")


# threshold_platelets = 150
# train['thrombocytopenia'] = np.where(train['Platelets'] < threshold_platelets, 1, 0)
# test['thrombocytopenia'] = np.where(test['Platelets'] < threshold_platelets, 1, 0)



# threshold_alk_phos_upper = 147  # Upper limit of normal range
# threshold_alk_phos_lower = 44   # Lower limit of normal range

# train['elevated_alk_phos'] = np.where((train['Alk_Phos'] > threshold_alk_phos_upper) | (train['Alk_Phos'] < threshold_alk_phos_lower), 1, 0)
# test['elevated_alk_phos'] = np.where((test['Alk_Phos'] > threshold_alk_phos_upper) | (test['Alk_Phos'] < threshold_alk_phos_lower), 1, 0)


# normal_copper_range = (62, 140)

# train['normal_copper'] = np.where((train['Copper'] >= normal_copper_range[0]) & (train['Copper'] <= normal_copper_range[1]), 1, 0)
# test['normal_copper'] = np.where((test['Copper'] >= normal_copper_range[0]) & (test['Copper'] <= normal_copper_range[1]), 1, 0)


# normal_albumin_range = (3.4, 5.4)

# train['normal_albumin'] = np.where((train['Albumin'] >= normal_albumin_range[0]) & (train['Albumin'] <= normal_albumin_range[1]), 1, 0)

# test['normal_albumin'] = np.where((test['Albumin'] >= normal_albumin_range[0]) & (test['Albumin'] <= normal_albumin_range[1]), 1, 0)


# normal_bilirubin_range = (0.2, 1.2)

# train['normal_bilirubin'] = np.where((train['Bilirubin'] >= normal_bilirubin_range[0]) & (train['Bilirubin'] <= normal_bilirubin_range[1]), 1, 0)
# test['normal_bilirubin'] = np.where((test['Bilirubin'] >= normal_bilirubin_range[0]) & (test['Bilirubin'] <= normal_bilirubin_range[1]), 1, 0)


# train['DiagnosisDays'] = train['Age'] - train['N_Days']
# test['DiagnosisDays'] = test['Age'] - test['N_Days']


# train['Age_Group'] = pd.cut(train['Age_y'], bins=[19, 29, 49, 64, 99], labels = [0, 1, 2, 3]).astype('int16')
# test['Age_Group'] = pd.cut(test['Age_y'], bins=[19, 29, 49, 64, 99], labels = [0, 1, 2, 3]).astype('int16')


# train['Bilirubin_Albumin'] =train['Bilirubin'] *train['Albumin']
# test['Bilirubin_Albumin'] =test['Bilirubin'] *test['Albumin']


# train['Diag_Year'] = (train['N_Days'] / 365).astype(int)
# train['Diag_Month'] = ((train['N_Days'] % 365) / 30).astype(int)
# test['Diag_Year'] = (test['N_Days'] / 365).astype(int)
# test['Diag_Month'] = ((test['N_Days'] % 365) / 30).astype(int)


# train['Risk_Score'] = train['Bilirubin'] + train['Albumin'] - train['Alk_Phos']
# test['Risk_Score'] = test['Bilirubin'] + test['Albumin'] - test['Alk_Phos']



# liver_columns = ['Bilirubin', 'Albumin', 'Alk_Phos', 'SGOT']
# train['Liver_Function_Index'] = train[liver_columns].mean(axis=1)
# test['Liver_Function_Index'] = test[liver_columns].mean(axis=1)


AllData = pd.concat([train,test],axis=0).drop(['id'],axis=1)


numerical_features = [i for i in AllData.columns if train[i].dtype!='object' ]
num_copy = numerical_features.copy()
num_copy.remove('Stage')
num_copy.remove('N_Days')
num_copy.remove('Age')


AllData


skewed = ['Bilirubin','Cholesterol',"Alk_Phos","Copper","Prothrombin",'SGOT','Tryglicerides','Albumin']

for i in skewed : 
    transformed_data, lambda_value = boxcox(AllData[i])
    AllData[i] = transformed_data

train = AllData.iloc[:n]
test = AllData.iloc[n:]


def remove_outliers(data, threshold=3):
    mean_value = np.mean(data)
    std_dev = np.std(data)
    
    # Define the lower and upper bounds for outliers
    lower_bound = mean_value - threshold * std_dev
    upper_bound = mean_value + threshold * std_dev
    
    # Keep only the data points within the bounds
    data_no_outliers = data[(data >= lower_bound) & (data <= upper_bound)]
    
    return data_no_outliers

train[num] = train_copy.apply(remove_outliers)


from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder


encoders = {
    'Drug': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1, categories=[['Placebo', 'D-penicillamine']]),
    'Sex': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1),
    'Ascites': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1),
    'Hepatomegaly': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1),
    'Spiders': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1),
    # 'Edema': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1, categories=[['N', 'S', 'Y']]),
    'Edema': OneHotEncoder(),
    'Stage': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
}


for feat, enc in encoders.items():
    if isinstance(enc, OrdinalEncoder):
        train[feat] = enc.fit_transform(train[[feat]]).astype('int32')
        test[feat] = enc.transform(test[[feat]]).astype('int32')
    if isinstance(enc, OneHotEncoder):
        # Transform and get new column names
        new_cols = enc.fit_transform(train[[feat]]).toarray().astype('int8')
        # col_names = [f"{feat}_{cat}" for cat in enc.categories_[0]]
        col_names = enc.get_feature_names_out()
        
        # Add new columns to the dataframe
        train[col_names] = new_cols
        train.drop(feat, axis=1, inplace=True)  # Drop original column
        
        # Repeat for the test set
        new_cols_test = enc.transform(test[[feat]]).toarray().astype('int8')
        test[col_names] = new_cols_test
        test.drop(feat, axis=1, inplace=True)


map_dict = {"Y" :1 , "N" : 0,"S" : -1}
Y_N_col = ['Ascites',"Hepatomegaly","Spiders","Edema"]

AllData[Y_N_col] = AllData[Y_N_col].apply(lambda x : x.map(map_dict))


train.columns

Index(['N_Days', 'Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders',
       'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT',
       'Tryglicerides', 'Platelets', 'Prothrombin', 'Stage', 'Status', 'Age_y',
       'Edema_N', 'Edema_S', 'Edema_Y'],
      dtype='object')


train['Desease_count']  = np.abs(train['Ascites']) + np.abs(train['Hepatomegaly']) + np.abs(train['Spiders'])  +  np.abs(train['Edema_Y'])
test['Desease_count']  = np.abs(test['Ascites']) + np.abs(test['Hepatomegaly']) + np.abs(test['Spiders'])  +  np.abs(test['Edema_Y'])


from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin


class DiagnosisDateTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Diagnosis_Date'] = X['Age'] - X['N_Days']
        return X
    
class AgeYearsTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Age_Years'] = round(X['Age'] / 365.25).astype("int16")
        return X

class AgeGroupsTransformer(BaseEstimator, TransformerMixin):
    """Older people might be hit harder (interaction) by health issues. Also can cover lifestyle influences, i.e.
    alcohol consumption etc."""
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        # Use years from above, min=26, max=78
        X['Age_Group'] = pd.cut(X['Age_Years'], bins=[19, 29, 49, 64, 99], labels = [0, 1, 2, 3]).astype('int16')
        return X

class BilirubinAlbuminTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Bilirubin_Albumin'] = X['Bilirubin'] * X['Albumin']
        return X

class DrugEffectivenessTransformer(BaseEstimator, TransformerMixin):
    # Placeholder concept, assuming 'Bilirubin' improvement is a measure of effectiveness
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Drug_Effectiveness'] = X['Drug'] * X['Bilirubin']
        return X

class SymptomScoreTransformer(BaseEstimator, TransformerMixin):
    # From data set explanations above let's add all the "bad" symptoms
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        # symptom_columns = ['Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
        symptom_columns = ['Ascites', 'Hepatomegaly', 'Spiders', 'Edema_N', 'Edema_S', 'Edema_Y']
        X['Symptom_Score'] = X[symptom_columns].sum(axis=1)
        return X
    
class SymptomCatTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.symptom_columns = ['Ascites', 'Hepatomegaly', 'Spiders', 'Edema_N', 'Edema_S', 'Edema_Y']
        self.encoder = OneHotEncoder(handle_unknown='ignore')

    def fit(self, X, y=None):
        X_copy = X.copy()
        symptom_scores = X_copy[self.symptom_columns].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
        self.encoder.fit(symptom_scores.values.reshape(-1, 1))
        return self

    def transform(self, X):
        X_transformed = X.copy()
        symptom_scores = X_transformed[self.symptom_columns].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
        
        encoded_features = self.encoder.transform(symptom_scores.values.reshape(-1, 1)).toarray().astype("int8")
        encoded_feature_names = self.encoder.get_feature_names_out(input_features=['Symptom_Score'])

        # Drop the original symptom columns and add the new encoded features
        # X_transformed.drop(columns=self.symptom_columns, inplace=True)
        X_transformed[encoded_feature_names] = pd.DataFrame(encoded_features, index=X_transformed.index)
        
        return X_transformed


class LiverFunctionTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        liver_columns = ['Bilirubin', 'Albumin', 'Alk_Phos', 'SGOT']
        X['Liver_Function_Index'] = X[liver_columns].mean(axis=1)
        return X

class RiskScoreTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Risk_Score'] = X['Bilirubin'] + X['Albumin'] - X['Alk_Phos']
        return X

class TimeFeaturesTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Diag_Year'] = (X['N_Days'] / 365).astype(int)
        X['Diag_Month'] = ((X['N_Days'] % 365) / 30).astype(int)
        return X
    
class ScalingTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.scaler = StandardScaler()
        self.num_feats = NUM_FEATS + ['Diagnosis_Date', 'Age_Years', 'Bilirubin_Albumin', 'Drug_Effectiveness', 
                                      'Symptom_Score', 'Liver_Function_Index', 'Risk_Score', 'Diag_Year', 'Diag_Month']

    def fit(self, X, y=None):
        self.scaler.fit(X[self.num_feats])
        return self

    def transform(self, X):
        X_scaled = X.copy()
        X_scaled[self.num_feats] = self.scaler.transform(X_scaled[self.num_feats])
        return X_scaled

# Define the pipeline
pipeline = Pipeline([
    ('diagnosis_date', DiagnosisDateTransformer()),
    ('age_years', AgeYearsTransformer()),
    ('age_groups', AgeGroupsTransformer()),
    ('bilirubin_albumin', BilirubinAlbuminTransformer()),
    ('drug_effectiveness', DrugEffectivenessTransformer()),
    ('symptom_score', SymptomScoreTransformer()),
    ('symptom_cat_score', SymptomCatTransformer()),
    ('liver_function', LiverFunctionTransformer()),
    ('risk_score', RiskScoreTransformer()),
    ('time_features', TimeFeaturesTransformer()),
    #('scaling', ScalingTransformer()),
    # ... ?
])

# Apply the pipeline to your dataframes
train = pipeline.fit_transform(train)
test = pipeline.transform(test)

# Update the CAT_FEATS
# CAT_FEATS = ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema', 'Stage', #old
#              'Age_Group', 'Symptom_Score'] # new 
# # Update the NUM_FEATS ????


AllData = pd.concat([train,test],axis=0)
group_by_cols = ['Stage' , 'Drug'  , 'Sex' , 'Ascites','Hepatomegaly','Spiders',]
aggregation_strategies = ['mean' , 'max' , 'min' , 'std']
cols_to_agg = ["Bilirubin","N_Days","Cholesterol","Albumin","Copper","Alk_Phos","SGOT","Tryglicerides","Platelets","Prothrombin"]


sep = train.shape[0]

for col_to_agg in cols_to_agg:
    for col in group_by_cols:
        for strategy in aggregation_strategies:
            AllData[f'{col_to_agg} {strategy} by {col}'] = AllData.groupby(col)[col_to_agg].transform(strategy)
            

AllData.drop(group_by_cols,axis=1,inplace=True)
# encoder = OrdinalEncoder()
# df[group_by_cols] = encoder.fit_transform(df[group_by_cols])
train = AllData[: sep]
test = AllData[sep :].drop('Status',axis=1)


X = train.drop("Status" , axis = 1 )
y = y.map(dict)
X_train ,X_test , y_train,y_test = train_test_split(X,y,test_size=0.2)


LazyClassifier().fit(X_train ,X_test , y_train,y_test)

 97%|███████████████████████████████████████████████████████████████████████████████▏  | 28/29 [00:31<00:00,  1.79it/s]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000371 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3163
[LightGBM] [Info] Number of data points in the train set: 6324, number of used features: 45
[LightGBM] [Info] Start training from score -1.101463
[LightGBM] [Info] Start training from score -0.458308
[LightGBM] [Info] Start training from score -3.344935

100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:31<00:00,  1.09s/it]

(                               Accuracy  Balanced Accuracy ROC AUC  F1 Score  \
 Model                                                                          
 LGBMClassifier                     0.83               0.62    None      0.82   
 NearestCentroid                    0.68               0.62    None      0.73   
 XGBClassifier                      0.83               0.61    None      0.82   
 AdaBoostClassifier                 0.81               0.60    None      0.80   
 BaggingClassifier                  0.81               0.59    None      0.80   
 RandomForestClassifier             0.83               0.59    None      0.81   
 BernoulliNB                        0.72               0.57    None      0.74   
 ExtraTreesClassifier               0.82               0.57    None      0.80   
 PassiveAggressiveClassifier        0.73               0.56    None      0.75   
 LabelSpreading                     0.74               0.55    None      0.74   
 DecisionTreeClassifier             0.73               0.55    None      0.73   
 LabelPropagation                   0.73               0.55    None      0.73   
 KNeighborsClassifier               0.78               0.53    None      0.77   
 SGDClassifier                      0.80               0.53    None      0.78   
 LogisticRegression                 0.80               0.53    None      0.78   
 SVC                                0.80               0.53    None      0.78   
 LinearSVC                          0.79               0.52    None      0.78   
 Perceptron                         0.77               0.52    None      0.76   
 CalibratedClassifierCV             0.79               0.52    None      0.78   
 LinearDiscriminantAnalysis         0.79               0.52    None      0.77   
 RidgeClassifier                    0.79               0.52    None      0.77   
 RidgeClassifierCV                  0.79               0.52    None      0.77   
 ExtraTreeClassifier                0.70               0.50    None      0.70   
 GaussianNB                         0.13               0.40    None      0.17   
 QuadraticDiscriminantAnalysis      0.19               0.39    None      0.26   
 DummyClassifier                    0.61               0.33    None      0.46   
 
                                Time Taken  
 Model                                      
 LGBMClassifier                       0.36  
 NearestCentroid                      0.03  
 XGBClassifier                        0.36  
 AdaBoostClassifier                   0.84  
 BaggingClassifier                    1.58  
 RandomForestClassifier               2.62  
 BernoulliNB                          0.05  
 ExtraTreesClassifier                 1.24  
 PassiveAggressiveClassifier          0.06  
 LabelSpreading                       2.49  
 DecisionTreeClassifier               0.21  
 LabelPropagation                     2.13  
 KNeighborsClassifier                 0.19  
 SGDClassifier                        0.16  
 LogisticRegression                   0.10  
 SVC                                  1.80  
 LinearSVC                            2.81  
 Perceptron                           0.05  
 CalibratedClassifierCV              14.14  
 LinearDiscriminantAnalysis           0.07  
 RidgeClassifier                      0.03  
 RidgeClassifierCV                    0.05  
 ExtraTreeClassifier                  0.03  
 GaussianNB                           0.04  
 QuadraticDiscriminantAnalysis        0.04  
 DummyClassifier                      0.02  ,
                                Accuracy  Balanced Accuracy ROC AUC  F1 Score  \
 Model                                                                          
 LGBMClassifier                     0.83               0.62    None      0.82   
 NearestCentroid                    0.68               0.62    None      0.73   
 XGBClassifier                      0.83               0.61    None      0.82   
 AdaBoostClassifier                 0.81               0.60    None      0.80   
 BaggingClassifier                  0.81               0.59    None      0.80   
 RandomForestClassifier             0.83               0.59    None      0.81   
 BernoulliNB                        0.72               0.57    None      0.74   
 ExtraTreesClassifier               0.82               0.57    None      0.80   
 PassiveAggressiveClassifier        0.73               0.56    None      0.75   
 LabelSpreading                     0.74               0.55    None      0.74   
 DecisionTreeClassifier             0.73               0.55    None      0.73   
 LabelPropagation                   0.73               0.55    None      0.73   
 KNeighborsClassifier               0.78               0.53    None      0.77   
 SGDClassifier                      0.80               0.53    None      0.78   
 LogisticRegression                 0.80               0.53    None      0.78   
 SVC                                0.80               0.53    None      0.78   
 LinearSVC                          0.79               0.52    None      0.78   
 Perceptron                         0.77               0.52    None      0.76   
 CalibratedClassifierCV             0.79               0.52    None      0.78   
 LinearDiscriminantAnalysis         0.79               0.52    None      0.77   
 RidgeClassifier                    0.79               0.52    None      0.77   
 RidgeClassifierCV                  0.79               0.52    None      0.77   
 ExtraTreeClassifier                0.70               0.50    None      0.70   
 GaussianNB                         0.13               0.40    None      0.17   
 QuadraticDiscriminantAnalysis      0.19               0.39    None      0.26   
 DummyClassifier                    0.61               0.33    None      0.46   
 
                                Time Taken  
 Model                                      
 LGBMClassifier                       0.36  
 NearestCentroid                      0.03  
 XGBClassifier                        0.36  
 AdaBoostClassifier                   0.84  
 BaggingClassifier                    1.58  
 RandomForestClassifier               2.62  
 BernoulliNB                          0.05  
 ExtraTreesClassifier                 1.24  
 PassiveAggressiveClassifier          0.06  
 LabelSpreading                       2.49  
 DecisionTreeClassifier               0.21  
 LabelPropagation                     2.13  
 KNeighborsClassifier                 0.19  
 SGDClassifier                        0.16  
 LogisticRegression                   0.10  
 SVC                                  1.80  
 LinearSVC                            2.81  
 Perceptron                           0.05  
 CalibratedClassifierCV              14.14  
 LinearDiscriminantAnalysis           0.07  
 RidgeClassifier                      0.03  
 RidgeClassifierCV                    0.05  
 ExtraTreeClassifier                  0.03  
 GaussianNB                           0.04  
 QuadraticDiscriminantAnalysis        0.04  
 DummyClassifier                      0.02  )


model2 = XGBClassifier().fit(X_train,y_train)


feature_importances = model2.feature_importances_
feature_names = model2.feature_names_in_
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df, palette='viridis')
plt.title('XGBoost - Feature Importances')
plt.show()


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you have already calculated feature importances and created feature_importance_df

# Filter features with importance greater than 0


X = X[selected_features.Feature]
test = test[selected_features.Feature]


X_train,X_test , y_train,y_test = train_test_split(X, y ,test_size= 0.2)


import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import numpy as np

# Assuming you have X_train, y_train for your multiclass classification problem

# Define the objective function for Optuna
def objective(trial):
    # Split the data into training and validation sets
    X_valid = X_test
    y_valid = y_test

    # Define the XGBoost parameters to be optimized
    params = {
        'objective': 'multi:softmax',  # For multiclass classification
        'num_class': len(np.unique(y_train)),  # Number of classes
        'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart']),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_float('lambda', 1e-4, 1.0),
        'alpha': trial.suggest_float('alpha', 1e-4, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
        'n_jobs': -1
    }

    # Create and train the XGBoost model
    model = xgb.XGBClassifier(**params)
    model.fit(X_train, y_train)

    # Predict on the validation set
    y_pred = model.predict_proba(X_valid)

    # Calculate log loss
    loss = log_loss(y_valid, y_pred)

    return loss

# Create a study object and optimize the objective function
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Print the best parameters and their values
print('Number of finished trials: ', len(study.trials))
print('Best trial:')
trial = study.best_trial

print('Value: ', trial.value)
print('Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

[I 2023-12-18 19:35:32,012] A new study created in memory with name: no-name-3088f359-7138-4803-9937-2aad132cfa77
[I 2023-12-18 19:35:46,029] Trial 0 finished with value: 0.467901439351651 and parameters: {'booster': 'dart', 'learning_rate': 0.12746943553980886, 'max_depth': 10, 'subsample': 0.6346355421802843, 'colsample_bytree': 0.816641272904935, 'lambda': 0.037217420773450346, 'alpha': 0.6762667042644042, 'min_child_weight': 6}. Best is trial 0 with value: 0.467901439351651.
[I 2023-12-18 19:35:46,189] Trial 1 finished with value: 0.6037975063023779 and parameters: {'booster': 'gblinear', 'learning_rate': 0.2991729870211477, 'max_depth': 10, 'subsample': 0.94887532050163, 'colsample_bytree': 0.6405892872125224, 'lambda': 0.312355003411337, 'alpha': 0.3507935732407453, 'min_child_weight': 2}. Best is trial 0 with value: 0.467901439351651.
[I 2023-12-18 19:35:46,541] Trial 2 finished with value: 0.46298152684355603 and parameters: {'booster': 'gbtree', 'learning_rate': 0.26953782341788446, 'max_depth': 5, 'subsample': 0.892787988701748, 'colsample_bytree': 0.9460900209863112, 'lambda': 0.48339904513109605, 'alpha': 0.24895141551110403, 'min_child_weight': 6}. Best is trial 2 with value: 0.46298152684355603.
[I 2023-12-18 19:35:47,001] Trial 3 finished with value: 0.468967842953452 and parameters: {'booster': 'gbtree', 'learning_rate': 0.1968570716692036, 'max_depth': 7, 'subsample': 0.6968008794555788, 'colsample_bytree': 0.9473767444269642, 'lambda': 0.9425810072775328, 'alpha': 0.47998117466167245, 'min_child_weight': 9}. Best is trial 2 with value: 0.46298152684355603.
[I 2023-12-18 19:35:47,742] Trial 4 finished with value: 0.553333358347255 and parameters: {'booster': 'gbtree', 'learning_rate': 0.22217501232681033, 'max_depth': 9, 'subsample': 0.5800458884599016, 'colsample_bytree': 0.9774549778662086, 'lambda': 0.23504152818580556, 'alpha': 0.21660861802534212, 'min_child_weight': 2}. Best is trial 2 with value: 0.46298152684355603.
[I 2023-12-18 19:35:48,368] Trial 5 finished with value: 0.5317804045845647 and parameters: {'booster': 'gbtree', 'learning_rate': 0.018117294927103383, 'max_depth': 7, 'subsample': 0.8699289173492628, 'colsample_bytree': 0.9189739682388844, 'lambda': 0.25347600798975467, 'alpha': 0.6081935011695926, 'min_child_weight': 6}. Best is trial 2 with value: 0.46298152684355603.
[I 2023-12-18 19:35:48,533] Trial 6 finished with value: 0.604705510005819 and parameters: {'booster': 'gblinear', 'learning_rate': 0.1425963716034376, 'max_depth': 3, 'subsample': 0.7847696219414522, 'colsample_bytree': 0.9313012161103533, 'lambda': 0.0808138985128455, 'alpha': 0.14907754293911185, 'min_child_weight': 1}. Best is trial 2 with value: 0.46298152684355603.
[I 2023-12-18 19:36:02,829] Trial 7 finished with value: 0.4455269381351693 and parameters: {'booster': 'dart', 'learning_rate': 0.06401154936301488, 'max_depth': 5, 'subsample': 0.8219892269754658, 'colsample_bytree': 0.7577975087536928, 'lambda': 0.5601645798142655, 'alpha': 0.37497662789569375, 'min_child_weight': 5}. Best is trial 7 with value: 0.4455269381351693.
[I 2023-12-18 19:36:17,008] Trial 8 finished with value: 0.4471619168379527 and parameters: {'booster': 'dart', 'learning_rate': 0.18518614626695104, 'max_depth': 4, 'subsample': 0.5772833539917517, 'colsample_bytree': 0.8790190343647026, 'lambda': 0.014618704252580753, 'alpha': 0.7750933280799106, 'min_child_weight': 6}. Best is trial 7 with value: 0.4455269381351693.
[I 2023-12-18 19:36:31,410] Trial 9 finished with value: 0.4502961995945259 and parameters: {'booster': 'dart', 'learning_rate': 0.06623971620335108, 'max_depth': 10, 'subsample': 0.8259893721747179, 'colsample_bytree': 0.538621853070705, 'lambda': 0.7644821198823564, 'alpha': 0.04035767654397569, 'min_child_weight': 8}. Best is trial 7 with value: 0.4455269381351693.
[I 2023-12-18 19:36:45,577] Trial 10 finished with value: 0.4394526359630778 and parameters: {'booster': 'dart', 'learning_rate': 0.09714866603753744, 'max_depth': 5, 'subsample': 0.9983406881809331, 'colsample_bytree': 0.7233332089220278, 'lambda': 0.6518618137378873, 'alpha': 0.9956815364834396, 'min_child_weight': 4}. Best is trial 10 with value: 0.4394526359630778.
[I 2023-12-18 19:36:59,174] Trial 11 finished with value: 0.4420806868129429 and parameters: {'booster': 'dart', 'learning_rate': 0.07513331116676263, 'max_depth': 5, 'subsample': 0.9758312511857521, 'colsample_bytree': 0.7334186153904888, 'lambda': 0.630752397615246, 'alpha': 0.9445299618185905, 'min_child_weight': 4}. Best is trial 10 with value: 0.4394526359630778.
[I 2023-12-18 19:37:14,029] Trial 12 finished with value: 0.43921844570171875 and parameters: {'booster': 'dart', 'learning_rate': 0.10401893859068073, 'max_depth': 6, 'subsample': 0.9969018827861796, 'colsample_bytree': 0.6923568436041777, 'lambda': 0.6648439228201306, 'alpha': 0.9902308244863821, 'min_child_weight': 4}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:37:28,516] Trial 13 finished with value: 0.44245942822750606 and parameters: {'booster': 'dart', 'learning_rate': 0.11324890072656754, 'max_depth': 6, 'subsample': 0.995416191138221, 'colsample_bytree': 0.6738320949753753, 'lambda': 0.7540313969577961, 'alpha': 0.9457505113045466, 'min_child_weight': 4}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:37:42,910] Trial 14 finished with value: 0.4621930920197788 and parameters: {'booster': 'dart', 'learning_rate': 0.1107416117233408, 'max_depth': 8, 'subsample': 0.928078684290883, 'colsample_bytree': 0.6090522222998729, 'lambda': 0.47879344029572646, 'alpha': 0.9932511796552741, 'min_child_weight': 3}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:37:57,967] Trial 15 finished with value: 0.4474133569466753 and parameters: {'booster': 'dart', 'learning_rate': 0.16567599644396172, 'max_depth': 6, 'subsample': 0.9894364006758499, 'colsample_bytree': 0.7156396424950335, 'lambda': 0.7066524721845684, 'alpha': 0.8202046828158043, 'min_child_weight': 4}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:37:58,148] Trial 16 finished with value: 0.6316962115326096 and parameters: {'booster': 'gblinear', 'learning_rate': 0.08711852917187111, 'max_depth': 3, 'subsample': 0.9049839106948524, 'colsample_bytree': 0.8025284576951568, 'lambda': 0.9638207834509299, 'alpha': 0.8538986216501384, 'min_child_weight': 10}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:38:12,686] Trial 17 finished with value: 0.5068721519517998 and parameters: {'booster': 'dart', 'learning_rate': 0.02562611100062981, 'max_depth': 4, 'subsample': 0.7374955773614293, 'colsample_bytree': 0.6812435384955904, 'lambda': 0.8447882061363468, 'alpha': 0.7104241446438149, 'min_child_weight': 8}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:38:27,084] Trial 18 finished with value: 0.46975695094155423 and parameters: {'booster': 'dart', 'learning_rate': 0.14460317257947905, 'max_depth': 8, 'subsample': 0.9423238114748542, 'colsample_bytree': 0.5885460205993664, 'lambda': 0.6251247753398984, 'alpha': 0.8795546103233672, 'min_child_weight': 3}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:38:27,273] Trial 19 finished with value: 0.629388936566327 and parameters: {'booster': 'gblinear', 'learning_rate': 0.11380824652881756, 'max_depth': 6, 'subsample': 0.8663594374183559, 'colsample_bytree': 0.7854091512126957, 'lambda': 0.8508191849820892, 'alpha': 0.9865412812063501, 'min_child_weight': 5}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:38:41,654] Trial 20 finished with value: 0.46607702807814266 and parameters: {'booster': 'dart', 'learning_rate': 0.0442070376743758, 'max_depth': 4, 'subsample': 0.9411599119909108, 'colsample_bytree': 0.5076933477492662, 'lambda': 0.41763765780594386, 'alpha': 0.7633907482071974, 'min_child_weight': 7}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:38:56,333] Trial 21 finished with value: 0.4426561686027803 and parameters: {'booster': 'dart', 'learning_rate': 0.08473267509648494, 'max_depth': 5, 'subsample': 0.9981744676865851, 'colsample_bytree': 0.7268553158627814, 'lambda': 0.6181456391660245, 'alpha': 0.8947372896442928, 'min_child_weight': 4}. Best is trial 12 with value: 0.43921844570171875.
[I 2023-12-18 19:39:10,268] Trial 22 finished with value: 0.4387168740928328 and parameters: {'booster': 'dart', 'learning_rate': 0.09748262983754383, 'max_depth': 5, 'subsample': 0.9710653610936847, 'colsample_bytree': 0.7046711478715056, 'lambda': 0.6461469944331844, 'alpha': 0.989009892940536, 'min_child_weight': 3}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:39:25,689] Trial 23 finished with value: 0.4504070528951304 and parameters: {'booster': 'dart', 'learning_rate': 0.09474700925189229, 'max_depth': 7, 'subsample': 0.956168731989341, 'colsample_bytree': 0.6775801163040833, 'lambda': 0.7183133953251563, 'alpha': 0.8856805130297458, 'min_child_weight': 3}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:39:40,363] Trial 24 finished with value: 0.4519329755242491 and parameters: {'booster': 'dart', 'learning_rate': 0.05132492278698467, 'max_depth': 6, 'subsample': 0.9083787931721038, 'colsample_bytree': 0.763918252688886, 'lambda': 0.5379270236495117, 'alpha': 0.9713820831248559, 'min_child_weight': 1}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:39:54,832] Trial 25 finished with value: 0.4426732569935103 and parameters: {'booster': 'dart', 'learning_rate': 0.09905215127466048, 'max_depth': 4, 'subsample': 0.9574814336764771, 'colsample_bytree': 0.7019388853936809, 'lambda': 0.6482937428738084, 'alpha': 0.7900471797463317, 'min_child_weight': 2}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:40:09,052] Trial 26 finished with value: 0.4394620463877227 and parameters: {'booster': 'dart', 'learning_rate': 0.13041251799441955, 'max_depth': 5, 'subsample': 0.9932974327043721, 'colsample_bytree': 0.641671423409493, 'lambda': 0.5770069435779102, 'alpha': 0.9998628691744464, 'min_child_weight': 5}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:40:23,845] Trial 27 finished with value: 0.45148806423476734 and parameters: {'booster': 'dart', 'learning_rate': 0.07792708214629346, 'max_depth': 8, 'subsample': 0.9258464613597038, 'colsample_bytree': 0.8289379921512159, 'lambda': 0.6807325204530122, 'alpha': 0.8888730191604699, 'min_child_weight': 3}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:40:24,335] Trial 28 finished with value: 0.4577889685665366 and parameters: {'booster': 'gbtree', 'learning_rate': 0.04019005353248907, 'max_depth': 6, 'subsample': 0.8866388780300798, 'colsample_bytree': 0.7727188267725985, 'lambda': 0.7985549687778268, 'alpha': 0.8228925956150835, 'min_child_weight': 4}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:40:24,535] Trial 29 finished with value: 0.6258411623905882 and parameters: {'booster': 'gblinear', 'learning_rate': 0.1230665111221104, 'max_depth': 4, 'subsample': 0.9621530426609177, 'colsample_bytree': 0.7402705876338667, 'lambda': 0.6952247137716429, 'alpha': 0.6623240856405609, 'min_child_weight': 5}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:40:39,298] Trial 30 finished with value: 0.4512572866739116 and parameters: {'booster': 'dart', 'learning_rate': 0.09907482739672191, 'max_depth': 7, 'subsample': 0.5117261460658773, 'colsample_bytree': 0.8302793199804653, 'lambda': 0.5683541569798679, 'alpha': 0.7394600251050194, 'min_child_weight': 2}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:40:53,511] Trial 31 finished with value: 0.43987648225042597 and parameters: {'booster': 'dart', 'learning_rate': 0.1372664551876506, 'max_depth': 5, 'subsample': 0.9892848164159387, 'colsample_bytree': 0.6563127862287073, 'lambda': 0.5709686207733861, 'alpha': 0.9988333868754334, 'min_child_weight': 5}. Best is trial 22 with value: 0.4387168740928328.
[I 2023-12-18 19:41:07,550] Trial 32 finished with value: 0.43806385797710357 and parameters: {'booster': 'dart', 'learning_rate': 0.12534520470092694, 'max_depth': 5, 'subsample': 0.9967398584994234, 'colsample_bytree': 0.6466917057968393, 'lambda': 0.42819438565674883, 'alpha': 0.9262582478094988, 'min_child_weight': 7}. Best is trial 32 with value: 0.43806385797710357.
[I 2023-12-18 19:41:21,750] Trial 33 finished with value: 0.4405562684137925 and parameters: {'booster': 'dart', 'learning_rate': 0.12329669755176835, 'max_depth': 5, 'subsample': 0.9630309766491958, 'colsample_bytree': 0.7091839237473331, 'lambda': 0.40841548907495945, 'alpha': 0.9289515047298709, 'min_child_weight': 7}. Best is trial 32 with value: 0.43806385797710357.
[W 2023-12-18 19:41:22,994] Trial 34 failed with parameters: {'booster': 'dart', 'learning_rate': 0.1046316520251173, 'max_depth': 6, 'subsample': 0.9240501401593115, 'colsample_bytree': 0.6226575295516056, 'lambda': 0.48299441560410866, 'alpha': 0.9274487250155293, 'min_child_weight': 7} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Anaconda\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\user\AppData\Local\Temp\ipykernel_10052\3999089154.py", line 33, in objective
    model.fit(X_train, y_train)
  File "C:\Anaconda\Lib\site-packages\xgboost\core.py", line 729, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
  File "C:\Anaconda\Lib\site-packages\xgboost\sklearn.py", line 1515, in fit
    self._Booster = train(
                    ^^^^^^
  File "C:\Anaconda\Lib\site-packages\xgboost\core.py", line 729, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
  File "C:\Anaconda\Lib\site-packages\xgboost\training.py", line 181, in train
    bst.update(dtrain, i, obj)
  File "C:\Anaconda\Lib\site-packages\xgboost\core.py", line 2050, in update
    _LIB.XGBoosterUpdateOneIter(
KeyboardInterrupt
[W 2023-12-18 19:41:23,021] Trial 34 failed with value None.

KeyboardInterrupt


xgb_params ={'max_depth': 10,
         'min_child_weight': 7,
         'learning_rate': 0.03419253503641095,
         'n_estimators': 472, 
         'subsample': 0.8843005833909504,
         'colsample_bytree': 0.0966352677605082,
         'random_state': 42, 
         'tree_method': 'hist', 
        'eval_metric': 'mlogloss', 
          'device' : 'cuda',
        'verbosity': 2, }




model3 = XGBClassifier(**xgb_params).fit(X_train,y_train)
y_hat = model3.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)
#Log Loss: 0.4349277405119213
#Log Loss: 0.4323600045757931
#Log Loss: 0.4292475293903112

Log Loss: 0.4329640144159542


from lightgbm import LGBMClassifier
lgbm_params = {
               'max_depth': 9, 'min_child_samples': 14, 
               'learning_rate': 0.034869481921747415, 
               'n_estimators': 274, 'min_child_weight': 9, 
               'subsample': 0.7717873512945741, 
               'colsample_bytree': 0.1702910221565107, 
               'reg_alpha': 0.10626128775335533, 
               'reg_lambda': 0.624196407787772, 
               'random_state': 42}
model = LGBMClassifier(**lgbm_params).fit(X_train,y_train)

[LightGBM] [Warning] Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves. (num_leaves=31).
[LightGBM] [Warning] Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves. (num_leaves=31).
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000735 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3106
[LightGBM] [Info] Number of data points in the train set: 6324, number of used features: 35
[LightGBM] [Info] Start training from score -1.084013
[LightGBM] [Info] Start training from score -0.464579
[LightGBM] [Info] Start training from score -3.400249
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf


y_hat = model.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)

# Log Loss: 0.4329097797154753

#Mean Over all folds : 0.43297852006166754

[LightGBM] [Warning] Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves. (num_leaves=31).
Log Loss: 0.4379695263416751


# model = XGBClassifier(**xgb_params)
# cv = KFold(n_splits=5, shuffle=True, random_state=42)
# val_logloss = []
# models = []
# for i, (train_index,test_index) in enumerate(cv.split(X,y)):
#     X_train , X_val = X.iloc[train_index], X.iloc[test_index]
#     y_train, y_val = y.iloc[train_index], y.iloc[test_index]
#     model.fit(X_train,y_train,verbose=False)
#     models.append(model)
#     train_preds= model.predict_proba(X_train)
#     val_preds= model.predict_proba(X_val)
#     train_logloss = log_loss(y_train , train_preds)
#     test_logloss = log_loss(y_val , val_preds)
#     val_logloss.append(test_logloss)
#     print(f'Fold {i+1} \n')
#     print(f'Train logloss: {train_logloss}')
#     print(f'Validation logloss: {test_logloss}')
#     print('-------------------\n')
# print(f"Mean Over all folds : {np.mean(val_logloss)}")


# model = LGBMClassifier(**lgbm_params)
# cv = KFold(n_splits=5, shuffle=True, random_state=42)
# val_logloss = []
# models = []
# for i, (train_index,test_index) in enumerate(cv.split(X,y)):
#     X_train , X_val = X.iloc[train_index], X.iloc[test_index]
#     y_train, y_val = y.iloc[train_index], y.iloc[test_index]
#     model.fit(X_train,y_train)
#     models.append(model)
#     train_preds= model.predict_proba(X_train)
#     val_preds= model.predict_proba(X_val)
#     train_logloss = log_loss(y_train , train_preds)
#     test_logloss = log_loss(y_val , val_preds)
#     val_logloss.append(test_logloss)
#     print(f'Fold {i+1} \n')
#     print(f'Train logloss: {train_logloss}')
#     print(f'Validation logloss: {test_logloss}')
#     print('-------------------\n')
# print(f"Mean Over all folds : {np.mean(val_logloss)}")


from sklearn.ensemble import VotingClassifier
voting = VotingClassifier([('lgbm' , model) , ('xgb' , model3) ],voting = 'soft').fit(X_train,y_train)


y_hat = voting.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)
# Log Loss: 0.4256159895204896


encoder = OrdinalEncoder()
Ord_enco = ['Drug',"Sex"]
AllData[Ord_enco] = encoder.fit_transform(AllData[Ord_enco])


train = AllData.iloc[:n]
test = AllData.iloc[n:]


X = train.drop("Status" , axis = 1 )
y = y.map(dict)
X_train ,X_test , y_train,y_test = train_test_split(X,y,test_size=0.2)

X


LazyClassifier().fit(X_train ,X_test , y_train,y_test)


from lightgbm import LGBMClassifier
lgbm_params = {
               'max_depth': 9, 'min_child_samples': 14, 
               'learning_rate': 0.034869481921747415, 
               'n_estimators': 274, 'min_child_weight': 9, 
               'subsample': 0.7717873512945741, 
               'colsample_bytree': 0.1702910221565107, 
               'reg_alpha': 0.10626128775335533, 
               'reg_lambda': 0.624196407787772, 
               'random_state': 42}
model = LGBMClassifier(**lgbm_params).fit(X_train,y_train)


y_hat = model.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)


# feature_importances = model.feature_importances_
# feature_names = model.feature_name_
# feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
# feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# # Plot feature importances
# plt.figure(figsize=(10, 6))
# sns.barplot(x='Importance', y='Feature', data=feature_importance_df, palette='viridis')
# plt.title('LGBM - Feature Importances')
# plt.show()


# selected_features = feature_importance_df[feature_importance_df['Importance'] > 30]

# # Plot feature importances
# plt.figure(figsize=(10, 6))
# sns.barplot(x='Importance', y='Feature', data=selected_features, palette='viridis')
# plt.title('LGBM - Selected Features with Importance > 30')
# plt.show()


# selected_features.Feature


# original_list = selected_features.Feature

# modified_list = [string.replace('_', ' ') for string in original_list]

# X_copy = X[['N_Days', 'Alk_Phos', 'Tryglicerides', 'Albumin', 'Bilirubin', 'Prothrombin', 'Age', 'Platelets', 'Cholesterol', 'SGOT', 'Copper', 'Age_y', 'Desease_count', 'N_Days mean by Stage', 'Cholesterol mean by Stage', 'Bilirubin mean by Drug', 'Copper std by Stage', 'N_Days mean by Drug', 'Alk_Phos std by Stage', 'Bilirubin std by Hepatomegaly', 'Bilirubin mean by Stage', 'Albumin std by Stage', 'Bilirubin std by Spiders', 'Cholesterol mean by Drug', 'Prothrombin max by Stage', 'Bilirubin std by Stage', 'Cholesterol std by Stage', 'SGOT std by Stage', 'Cholesterol std by Spiders', 'N_Days std by Spiders', 'N_Days std by Stage', 'Albumin std by Drug', 'N_Days mean by Sex', 'N_Days mean by Hepatomegaly', 'Tryglicerides std by Stage', 'Albumin mean by Spiders', 'Bilirubin mean by Sex', 'Albumin mean by Stage', 'Bilirubin mean by Hepatomegaly', 'SGOT mean by Stage', 'Cholesterol std by Hepatomegaly', 'N_Days std by Drug', 'Cholesterol std by Drug', 'N_Days std by Sex', 'Albumin std by Spiders', 'Copper std by Drug', 'Albumin mean by Hepatomegaly', 'Platelets mean by Stage', 'Bilirubin std by Edema', 'Prothrombin mean by Stage', 'N_Days max by Edema', 'Bilirubin std by Drug', 'Cholesterol max by Edema', 'Albumin mean by Drug', 'Copper mean by Hepatomegaly', 'Albumin max by Stage', 'Bilirubin std by Sex', 'Alk_Phos std by Drug', 'Cholesterol mean by Spiders', 'Bilirubin mean by Ascites', 'Copper mean by Spiders', 'Cholesterol std by Edema', 'Prothrombin min by Stage']]


# X_train_copy,X_test_copy , y_train_copy,y_test_copy = train_test_split(X_copy, y ,test_size= 0.2)


# from lightgbm import LGBMClassifier
# lgbm_params = {
#                'max_depth': 9, 'min_child_samples': 14, 
#                'learning_rate': 0.034869481921747415, 
#                'n_estimators': 274, 'min_child_weight': 9, 
#                'subsample': 0.7717873512945741, 
#                'colsample_bytree': 0.1702910221565107, 
#                'reg_alpha': 0.10626128775335533, 
#                'reg_lambda': 0.624196407787772, 
#                'random_state': 42}
# model = LGBMClassifier(**lgbm_params).fit(X_train_copy,y_train_copy)


# y_hat = model.predict_proba(X_test_copy)
# classes = [0, 1, 2]
# logloss = log_loss(y_test, y_hat, labels=classes)

# print("Log Loss:", logloss)


# Fit the model (internally splits the data into training and testing sets)
# LazyClassifier().fit(X_train, y_train,y_train , y_test)


model2 = XGBClassifier().fit(X_train,y_train)


feature_importances = model2.feature_importances_
feature_names = model2.feature_names_in_
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df, palette='viridis')
plt.title('XGBoost - Feature Importances')
plt.show()


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you have already calculated feature importances and created feature_importance_df

# Filter features with importance greater than 0
selected_features = feature_importance_df[feature_importance_df['Importance'] > 0]

# Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=selected_features, palette='viridis')
plt.title('XGB - Selected Features with Importance > 0')
plt.show()


X = X[selected_features.Feature]
test = test[selected_features.Feature]


X_train,X_test , y_train,y_test = train_test_split(X, y ,test_size= 0.2)


# import optuna
# import xgboost as xgb
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import log_loss
# import numpy as np

# # Assuming you have X_train, y_train for your multiclass classification problem

# # Define the objective function for Optuna
# def objective(trial):
#     # Split the data into training and validation sets
#     X_valid = X_test
#     y_valid = y_test

#     # Define the XGBoost parameters to be optimized
#     params = {
#         'objective': 'multi:softmax',  # For multiclass classification
#         'num_class': len(np.unique(y_train)),  # Number of classes
#         'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart']),
#         'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
#         'max_depth': trial.suggest_int('max_depth', 3, 10),
#         'subsample': trial.suggest_float('subsample', 0.5, 1.0),
#         'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
#         'lambda': trial.suggest_float('lambda', 1e-4, 1.0),
#         'alpha': trial.suggest_float('alpha', 1e-4, 1.0),
#         'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
#         'random_state': 42,
#         'n_jobs': -1
#     }

#     # Create and train the XGBoost model
#     model = xgb.XGBClassifier(**params)
#     model.fit(X_train, y_train)

#     # Predict on the validation set
#     y_pred = model.predict_proba(X_valid)

#     # Calculate log loss
#     loss = log_loss(y_valid, y_pred)

#     return loss

# # Create a study object and optimize the objective function
# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=100)

# # Print the best parameters and their values
# print('Number of finished trials: ', len(study.trials))
# print('Best trial:')
# trial = study.best_trial

# print('Value: ', trial.value)
# print('Params: ')
# for key, value in trial.params.items():
#     print(f'    {key}: {value}')


xgb_params ={'max_depth': 10,
         'min_child_weight': 7,
         'learning_rate': 0.03419253503641095,
         'n_estimators': 472, 
         'subsample': 0.8843005833909504,
         'colsample_bytree': 0.0966352677605082,
         'random_state': 42, 
         'tree_method': 'hist', 
        'eval_metric': 'mlogloss', 
          'device' : 'cuda',
        'verbosity': 2, }




model3 = XGBClassifier(**xgb_params).fit(X_train,y_train)
y_hat = model3.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)
#Log Loss: 0.4349277405119213
#Log Loss: 0.4323600045757931


def objective(trial):
    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define hyperparameters to be optimized
    n_estimators = trial.suggest_int('n_estimators', 50, 500)
    max_depth = trial.suggest_int('max_depth', 5, 30)
    min_samples_split = trial.suggest_float('min_samples_split', 0.1, 1.0)
    min_samples_leaf = trial.suggest_float('min_samples_leaf', 0.1, 0.5)

    # Create ExtraTreesClassifier with suggested hyperparameters
    model = ExtraTreesClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    # Fit the model on training data
    model.fit(X_train, y_train)

    # Predict probabilities for the validation set
    y_val_pred_proba = model.predict_proba(X_val)

    # Calculate log loss
    loss = log_loss(y_val, y_val_pred_proba)

    return loss

# Assuming X and y are your feature matrix and target vector
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Get the best parameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train the model with the best parameters on the entire dataset
best_model = ExtraTreesClassifier(**best_params, random_state=42)
best_model.fit(X, y)


from sklearn.ensemble import ExtraTreesClassifier
Extra = ExtraTreesClassifier(**{'n_estimators': 362, 'max_depth': 21, 'min_samples_split': 0.12597324652958813, 'min_samples_leaf': 0.10043794568972335}
).fit(X_train,y_train)
y_hat = Extra.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)


# other_xdgb_para "{'max_depth': 10,
#          'min_child_weight': 7,
#          'learning_rate': 0.03419253503641095,
#          'n_estimators': 472, 
#          'subsample': 0.8843005833909504,
#          'colsample_bytree': 0.0966352677605082,
#          'random_state': 42, 
#          'tree_method': 'hist', 
#         'eval_metric': 'mlogloss', 
#           'device' : 'cuda',
#         'verbosity': 2, }"


from lightgbm import LGBMClassifier
lgbm_params = {
               'max_depth': 9, 'min_child_samples': 14, 
               'learning_rate': 0.034869481921747415, 
               'n_estimators': 274, 'min_child_weight': 9, 
               'subsample': 0.7717873512945741, 
               'colsample_bytree': 0.1702910221565107, 
               'reg_alpha': 0.10626128775335533, 
               'reg_lambda': 0.624196407787772, 
               'random_state': 42}
model = LGBMClassifier(**lgbm_params).fit(X_train,y_train)


y_hat = model.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)

# Log Loss: 0.4329097797154753


from sklearn.ensemble import RandomForestClassifier
model_tree = RandomForestClassifier(n_estimators=250 , max_depth= 10 ,).fit(X_train,y_train)


y_hat = model_tree.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)


import optuna
def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 50, 1000),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),
        'depth': trial.suggest_int('depth', 2, 12),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10),
        'random_strength': trial.suggest_loguniform('random_strength', 1e-3, 10)
    }

    # Create CatBoost classifier with the suggested parameters
    clf = CatBoostClassifier(**params)

    # Fit the model
    clf.fit(X_train, y_train, verbose=False)

    # Make predictions on the test set
    y_pred_proba = clf.predict_proba(X_test)

    # Calculate log loss
    logloss = log_loss(y_test, y_pred_proba)

    return logloss

# Create a study object and optimize the objective function
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Get the best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train the final model with the best hyperparameters
best_clf = CatBoostClassifier(**best_params)
best_clf.fit(X_train, y_train)

# Make predictions on the test set
final_pred_proba = best_clf.predict_proba(X_test)

# Calculate log loss on the test set
final_logloss = log_loss(y_test, final_pred_proba)
print("Final Log Loss on Test Set:", final_logloss)


cat_params = {'iterations': 469,
                   'depth': 20, 
                   'min_data_in_leaf': 11,
                   'learning_rate': 0.13812945166006543, 
                   'grow_policy': 'Lossguide',
                   'bootstrap_type' : 'Bernoulli'}
cat = CatBoostClassifier(**cat_params).fit(X_train,y_train)


y_hat = cat.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)


xgb2_params  = {'objective': 'multi_logloss', 'max_depth': 9, 'min_child_weight': 8, 'learning_rate': 0.0337716365315986, 'n_estimators': 733, 'subsample': 0.6927955384688348, 'colsample_bytree': 0.1234702658812108, 'reg_alpha': 0.18561628377665318, 'reg_lambda': 0.5565488299127089, 'random_state': 42}
xgb2 = XGBClassifier(**xgb2_params).fit(X_train,y_train)
y_hat = xgb2.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)


lgb2_params = {'objective': 'multi_logloss', 'max_depth': 8, 'min_child_samples': 16, 'learning_rate': 0.014553931721109505, 'n_estimators': 779, 'min_child_weight': 9, 'subsample': 0.44799071313755495, 'colsample_bytree': 0.15868021337418978, 'reg_alpha': 0.17992542471160344, 'reg_lambda': 0.8231621177994548, 'random_state': 42}
lgb2 = LGBMClassifier(**lgb2_params).fit(X_train,y_train)
y_hat = lgb2.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)


xgb2 = XGBClassifier(**xgb2_params)


from sklearn.ensemble import HistGradientBoostingClassifier
import optuna

def objective(trial):
    # Define hyperparameters to optimize
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1.0, log=True)
    max_iter = trial.suggest_int('max_iter', 50, 500)
    max_depth = trial.suggest_int('max_depth', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)
    max_leaf_nodes = trial.suggest_int('max_leaf_nodes', 2, 50)

    # Create and train the HistGradientBoostingClassifier
    model = HistGradientBoostingClassifier(
        learning_rate=learning_rate,
        max_iter=max_iter,
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        max_leaf_nodes=max_leaf_nodes,
        random_state=42
    )

    model.fit(X_train, y_train)

    # Make probabilistic predictions on the test set for log loss calculation
    y_pred_proba = model.predict_proba(X_test)

    # Calculate log loss
    logloss = log_loss(y_test, y_pred_proba)

    return logloss

# Create a study and optimize the objective function
study = optuna.create_study(direction='minimize')  # Note the direction is 'minimize' for log loss
study.optimize(objective, n_trials=100)

# Get the best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train the final model with the best hyperparameters
best_model = HistGradientBoostingClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

# Make probabilistic predictions on the test set for log loss calculation
final_predictions_proba = best_model.predict_proba(X_test)

# Calculate log loss of the final model on the test set
final_logloss = log_loss(y_test, final_predictions_proba)

print("Final Log Loss on Test Set:", final_logloss)


from sklearn.ensemble import HistGradientBoostingClassifier

hist_params = {'l2_regularization': 8.876168706639714,
                                        'early_stopping': False,
                                        'learning_rate': 0.009956485590638034,
                                        'max_iter': 500,
                                        'max_depth': 16,
                                        'max_bins': 255,
                                        'min_samples_leaf': 16,
                                        'max_leaf_nodes': 18,
                                        'random_state': 3}

hist = HistGradientBoostingClassifier(**hist_params).fit(X_train,y_train)
print(log_loss(y_test , hist.predict_proba(X_test)))


from sklearn.ensemble import VotingClassifier
voting = VotingClassifier([('lgbm' , model) , ('xgb' , model3) ,  ('cat' , cat)],voting = 'soft').fit(X_train,y_train)


y_hat = voting.predict_proba(X_test)
classes = [0, 1, 2]
logloss = log_loss(y_test, y_hat, labels=classes)

print("Log Loss:", logloss)
# Log Loss: 0.4256159895204896


val_logloss = []
cv = KFold(n_splits=5, shuffle=True, random_state=42)

models = []
for i, (train_index,test_index) in enumerate(cv.split(X,y)):
    X_train , X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    voting.fit(X_train,y_train)
    models.append(voting)
    train_preds= voting.predict_proba(X_train)
    val_preds= voting.predict_proba(X_val)
    train_logloss = log_loss(y_train , train_preds)
    test_logloss = log_loss(y_val , val_preds)
    val_logloss.append(test_logloss)
    print(f'Fold {i+1} \n')
    print(f'Train logloss: {train_logloss}')
    print(f'Validation logloss: {test_logloss}')
    print('-------------------\n')
print(f"Mean Over all folds : {np.mean(val_logloss)}")
 
#Mean Over all folds : 0.428706267095177  for 3


print(type(model3) , type(model) , type(cat))


from sklearn.metrics import  confusion_matrix, f1_score

models = [model3, model, cat,hist,voting]
fig,ax = plt.subplots(1,4,figsize = (25,4))
for i, model in enumerate(models): 
        

        y_pred_proba = model.predict_proba(X_test)
        y_pred = model.predict(X_test)
        f1=  f1_score(y_test, y_pred, average='weighted')
        logloss = log_loss(y_test, y_pred_proba)
        conf_matrix = confusion_matrix(y_test, y_pred)
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes, ax=ax[i])

        ax[i].set_title(str(type(model)))
        ax[i].set_xlabel('Predicted')
        ax[i].set_ylabel('Actual')
        print(f'f1_score for {str(type(model))} : {f1}')
        print(f'log_loss for {str(type(model))} : {logloss}')
        print('-'*100)
fig.tight_layout()
fig.show()


import optuna

def objective(trial):
    lgb_weight = trial.suggest_int('lgb_weight', 0, 50)
    xgb_weight = trial.suggest_int('xgb_weight', 0, 100 - lgb_weight)
    
    cb_weight = 100- lgb_weight - xgb_weight

    weights = [lgb_weight/100, xgb_weight/100, cb_weight/100]
    
    ensemble = VotingClassifier([('lgbm' , model) , ('xgb' , model3) ,  ('cat' , cat)],voting = 'soft').fit(X_train,y_train)
    
    ensemble.fit(X, y)
    y_pred = ensemble.predict(X_test)
    
    return f1_score(y_test, y_pred, average='weighted')


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

# Get the best parameters
best_params = study.best_params
best_weights = [best_params['lgb_weight'], best_params['xgb_weight'], 100- best_params['lgb_weight'] - best_params['xgb_weight']]

print("Best Weights:", best_weights)


from sklearn.ensemble import VotingClassifier


Ensemble = VotingClassifier([('lgbm' , model) , ('xgb' , model3) ,  ('cat' , cat)],voting = 'soft', weights = [32, 46, 22]).fit(X_train,y_train)


%matplotlib inline

y_pred = Ensemble.predict(X_test)
y_pred_proba = Ensemble.predict_proba(X_test)
f1 = f1_score(y_test, y_pred, average='weighted')
print("F1 Score:", f1)

logloss = log_loss(y_test, y_pred_proba)
print("Log Loss:", logloss)

conf_matrix_2 = confusion_matrix(y_test, y_pred)

sns.heatmap(conf_matrix_2, annot=True ,fmt= 'd')
plt.show()


val_logloss = []
cv = KFold(n_splits=5, shuffle=True, random_state=42)

models = []
for i, (train_index,test_index) in enumerate(cv.split(X,y)):
    X_train , X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    Ensemble.fit(X_train,y_train)
    models.append(Ensemble)
    train_preds= Ensemble.predict_proba(X_train)
    val_preds= Ensemble.predict_proba(X_val)
    train_logloss = log_loss(y_train , train_preds)
    test_logloss = log_loss(y_val , val_preds)
    val_logloss.append(test_logloss)
    print(f'Fold {i+1} \n')
    print(f'Train logloss: {train_logloss}')
    print(f'Validation logloss: {test_logloss}')
    print('-------------------\n')
print(f"Mean Over all folds : {np.mean(val_logloss)}")
# Mean Over all folds : 0.4270813308353992 
#Mean Over all folds : 0.428706267095177  for 3


y_pred = voting.predict_proba(test)


from scipy.optimize import minimize

catboost_pred_proba = cat.predict_proba(X_test)
xgboost_pred_proba = model3.predict_proba(X_test)
model6_pred_proba = model.predict_proba(X_test)

# Define the objective function for optimization
def objective(weights):
    blended_pred_proba = (
        weights[0] * catboost_pred_proba +
        weights[1] * xgboost_pred_proba +
        weights[2] * model6_pred_proba
    )
    blend_log_loss = log_loss(y_test, blended_pred_proba)
    return blend_log_loss

# Perform grid search to find the best weights
result = minimize(objective, [1/3, 1/3, 1/3], bounds=[(0, 1), (0, 1), (0, 1)], method='L-BFGS-B')

# Get the best weights
best_weights = result.x
print(f'Best Weights: {best_weights}')

# Blend predictions with the best weights
final_blended_pred_proba = (
    best_weights[0] * catboost_pred_proba +
    best_weights[1] * xgboost_pred_proba +
    best_weights[2] * model6_pred_proba
)

# Calculate log loss on the validation set with the best weights
final_blend_log_loss = log_loss(y_test, final_blended_pred_proba)
print(f'Final Blended Log Loss: {final_blend_log_loss}')


model = XGBClassifier(**xgb_params)
cv = KFold(n_splits=5, shuffle=True, random_state=42)
val_logloss = []
models = []
for i, (train_index,test_index) in enumerate(cv.split(X,y)):
    X_train , X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    model.fit(X_train,y_train,verbose=False)
    models.append(model)
    train_preds= model.predict_proba(X_train)
    val_preds= model.predict_proba(X_val)
    train_logloss = log_loss(y_train , train_preds)
    test_logloss = log_loss(y_val , val_preds)
    val_logloss.append(test_logloss)
    print(f'Fold {i+1} \n')
    print(f'Train logloss: {train_logloss}')
    print(f'Validation logloss: {test_logloss}')
    print('-------------------\n')
print(f"Mean Over all folds : {np.mean(val_logloss)}")


model = XGBClassifier(**xgb2_params)
cv = KFold(n_splits=5, shuffle=True, random_state=42)
val_logloss = []
models = []
for i, (train_index,test_index) in enumerate(cv.split(X,y)):
    X_train , X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    model.fit(X_train,y_train,verbose=False)
    models.append(model)
    train_preds= model.predict_proba(X_train)
    val_preds= model.predict_proba(X_val)
    train_logloss = log_loss(y_train , train_preds)
    test_logloss = log_loss(y_val , val_preds)
    val_logloss.append(test_logloss)
    print(f'Fold {i+1} \n')
    print(f'Train logloss: {train_logloss}')
    print(f'Validation logloss: {test_logloss}')
    print('-------------------\n')
print(f"Mean Over all folds : {np.mean(val_logloss)}")


model = CatBoostClassifier(**cat_params)
cv = KFold(n_splits=5, shuffle=True, random_state=42)
val_logloss = []
models = []
for i, (train_index,test_index) in enumerate(cv.split(X,y)):
    X_train , X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    model.fit(X_train,y_train,verbose=False)
    models.append(model)
    train_preds= model.predict_proba(X_train)
    val_preds= model.predict_proba(X_val)
    train_logloss = log_loss(y_train , train_preds)
    test_logloss = log_loss(y_val , val_preds)
    val_logloss.append(test_logloss)
    print(f'Fold {i+1} \n')
    print(f'Train logloss: {train_logloss}')
    print(f'Validation logloss: {test_logloss}')
    print('-------------------\n')
print(f"Mean Over all folds : {np.mean(val_logloss)}")


model = LGBMClassifier(**lgbm_params)
cv = KFold(n_splits=5, shuffle=True, random_state=42)
val_logloss = []
models = []
for i, (train_index,test_index) in enumerate(cv.split(X,y)):
    X_train , X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    model.fit(X_train,y_train)
    models.append(model)
    train_preds= model.predict_proba(X_train)
    val_preds= model.predict_proba(X_val)
    train_logloss = log_loss(y_train , train_preds)
    test_logloss = log_loss(y_val , val_preds)
    val_logloss.append(test_logloss)
    print(f'Fold {i+1} \n')
    print(f'Train logloss: {train_logloss}')
    print(f'Validation logloss: {test_logloss}')
    print('-------------------\n')
print(f"Mean Over all folds : {np.mean(val_logloss)}")


y_pred = np.array([model.predict_proba(test)/5 for model in models]).sum(axis=0)


log_loss(y_test , model3.predict_proba(X_test))


y_pred = np.array([model.predict_proba(test)/5 for model in models]).sum(axis=0)


y_pred = Ensemble.predict_proba(test)


sub['Status_D'] = y_pred[:,0]
sub['Status_C'] = y_pred[:,1]
sub['Status_CL'] = y_pred[:,2]


sub.to_csv('Cirrhosis5.csv',index=False)


from sklearn.inspection import PartialDependenceDisplay


fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize = (15, 15))

fig.suptitle('Partial Dependence Plots of Status = C', fontsize = 16)

HG_disp = PartialDependenceDisplay.from_estimator(voting, X, ['Bilirubin mean by Drug', 'Prothrombin', 'Alk_Phos'],
                                                  pd_line_kw = {"color": "red"},
                                                  ice_lines_kw = {"color": "steelblue"},
                                                  kind = 'both', 
                                                  target = 0,
                                                  response_method = 'predict_proba',
                                                  ax = ax1)
ax1.set_title('voting Partial Dependency Plots')

LGBM_disp = PartialDependenceDisplay.from_estimator(model, X, ['Bilirubin mean by Drug', 'Prothrombin', 'Alk_Phos'],
                                                    pd_line_kw = {"color": "red"},
                                                    ice_lines_kw = {"color": "steelblue"},
                                                    kind = 'both', 
                                                    target = 0,
                                                    response_method = 'predict_proba',
                                                    ax = ax2)
ax2.set_title('LGBM Partial Dependency Plots')

XGB_disp = PartialDependenceDisplay.from_estimator(model3, X, ['Bilirubin mean by Drug', 'Prothrombin', 'Alk_Phos'],
                                                   pd_line_kw = {"color": "red"},
                                                   ice_lines_kw = {"color": "steelblue"},
                                                   kind = 'both', 
                                                   target = 0,
                                                   response_method = 'predict_proba',
                                                   ax = ax3)
ax3.set_title('XGBoost Partial Dependency Plots')
plt.savefig('D_partial_dependency_plots.png');


y.value_counts()


fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize = (15, 15))

fig.suptitle('Partial Dependence Plots of Status = C', fontsize = 16)

HG_disp = PartialDependenceDisplay.from_estimator(voting, X, ['Bilirubin mean by Drug', 'Prothrombin', 'Alk_Phos'],
                                                  pd_line_kw = {"color": "red"},
                                                  ice_lines_kw = {"color": "steelblue"},
                                                  kind = 'both', 
                                                  target = 1,
                                                  response_method = 'predict_proba',
                                                  ax = ax1)
ax1.set_title('voting Partial Dependency Plots')

LGBM_disp = PartialDependenceDisplay.from_estimator(model, X, ['Bilirubin mean by Drug', 'Prothrombin', 'Alk_Phos'],
                                                    pd_line_kw = {"color": "red"},
                                                    ice_lines_kw = {"color": "steelblue"},
                                                    kind = 'both', 
                                                    target = 1,
                                                    response_method = 'predict_proba',
                                                    ax = ax2)
ax2.set_title('LGBM Partial Dependency Plots')

XGB_disp = PartialDependenceDisplay.from_estimator(model3, X, ['Bilirubin mean by Drug', 'Prothrombin', 'Alk_Phos'],
                                                   pd_line_kw = {"color": "red"},
                                                   ice_lines_kw = {"color": "steelblue"},
                                                   kind = 'both', 
                                                   target = 1,
                                                   response_method = 'predict_proba',
                                                   ax = ax3)
ax3.set_title('XGBoost Partial Dependency Plots')
plt.savefig('C_partial_dependency_plots.png');


X.columns


fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize = (15, 15))

fig.suptitle('Partial Dependence Plots of Status = C', fontsize = 16)

HG_disp = PartialDependenceDisplay.from_estimator(voting, X, [ 'SGOT', 'Copper', 'Platelets'],
                                                  pd_line_kw = {"color": "red"},
                                                  ice_lines_kw = {"color": "steelblue"},
                                                  kind = 'both', 
                                                  target = 0,
                                                  response_method = 'predict_proba',
                                                  ax = ax1)
ax1.set_title('voting Partial Dependency Plots')

LGBM_disp = PartialDependenceDisplay.from_estimator(model, X,[ 'SGOT', 'Copper', 'Platelets'],
                                                    pd_line_kw = {"color": "red"},
                                                    ice_lines_kw = {"color": "steelblue"},
                                                    kind = 'both', 
                                                    target = 0,
                                                    response_method = 'predict_proba',
                                                    ax = ax2)
ax2.set_title('LGBM Partial Dependency Plots')

XGB_disp = PartialDependenceDisplay.from_estimator(model3, X, [ 'SGOT', 'Copper', 'Platelets'],
                                                   pd_line_kw = {"color": "red"},
                                                   ice_lines_kw = {"color": "steelblue"},
                                                   kind = 'both', 
                                                   target = 0,
                                                   response_method = 'predict_proba',
                                                   ax = ax3)
ax3.set_title('XGBoost Partial Dependency Plots')
plt.savefig('idk.png');

	N_Days	Drug	Age	Sex	Ascites	Hepatomegaly	Spiders	Edema	Bilirubin	Cholesterol	Albumin	Copper	Alk_Phos	SGOT	Tryglicerides	Platelets	Prothrombin	Stage	Status
0	999	D-penicillamine	21532	M	N	N	N	N	2.30	316.00	3.35	172.00	1601.00	179.80	63.00	394.00	9.70	3.00	D
1	2574	Placebo	19237	F	N	N	N	N	0.90	364.00	3.54	63.00	1440.00	134.85	88.00	361.00	11.00	3.00	C
2	3428	Placebo	13727	F	N	Y	Y	Y	3.30	299.00	3.55	131.00	1029.00	119.35	50.00	199.00	11.70	4.00	D
3	2576	Placebo	18460	F	N	N	N	N	0.60	256.00	3.50	58.00	1653.00	71.30	96.00	269.00	10.70	3.00	C
4	788	Placebo	16658	F	N	Y	N	N	1.10	346.00	3.65	63.00	1181.00	125.55	96.00	298.00	10.60	4.00	C
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5266	2870	Placebo	12279	F	N	N	N	N	1.30	302.00	3.43	75.00	1345.00	145.00	44.00	181.00	10.60	3.00	NaN
5267	1770	Placebo	24803	F	N	N	N	N	0.50	219.00	4.09	121.00	663.00	79.05	94.00	311.00	9.70	3.00	NaN
5268	3707	D-penicillamine	16990	F	N	Y	N	N	0.80	315.00	4.09	13.00	1637.00	170.50	70.00	426.00	10.90	3.00	NaN
5269	1216	Placebo	11773	F	N	N	N	N	0.70	329.00	3.80	52.00	678.00	57.00	126.00	306.00	10.20	1.00	NaN
5270	2272	D-penicillamine	21600	F	N	N	N	N	2.00	232.00	3.42	18.00	1636.00	170.50	83.00	213.00	13.60	2.00	NaN